Dogs:
intake_stacked <- function(foo, ordr_in, var){
# create data frame by yy-mm and counts of outcomes
foo <- foo %>%
filter(!!sym(var) %in% ordr_in) %>% # too few
mutate(fact_in = factor(!!sym(var), levels = ordr_in)) %>%
mutate(Time = format(as.Date(intake_date), "%Y-%m")) %>%
group_by(Time, fact_in, .drop=FALSE) %>%
summarise(n = length(fact_in), .groups='drop_last') %>%
mutate(Percentage = n / sum(n)) # for plotting
p <- ggplot(foo, aes(x=Time, y=Percentage, fill=fact_in, group=fact_in,
text = paste('Percentage:', round(Percentage,2),'\n','Type:', fact_in)))+
geom_area(alpha=0.6 , size=0.5, colour="black")+
theme(panel.border=element_rect(colour="black", fill= NA)) +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust=1))+
scale_fill_discrete(name = "Intake Type")
return(ggplotly(p, tooltip = c('text')) %>% config(displayModeBar = FALSE))
}
# filtering 100+ animals: removed pub assist, transfer, wildlife
ordr_in_filt <- c('QUARANTINE', 'RETURN', 'DISPO REQ', 'CONFISCATE', 'OWNER SUR', 'STRAY')
#ordr_in <- names(sort(table(foo$src_intake_type))) # all categories
intake_stacked(foo %>% filter(species == 'Dog'), ordr_in_filt, 'src_intake_type')
Cats:
intake_stacked(foo %>% filter(species == 'Cat'), ordr_in_filt, 'src_intake_type')
Absolute:
# absolute intake - makes much more sense for the overall one, whereas relative are good within-species
var <- 'species'
by_month <- foo %>%
mutate(fact_in = factor(!!sym(var), levels=rev(c('Dog', 'Cat', 'Other')))) %>%
mutate(Time = format(as.Date(intake_date), "%Y-%m")) %>%
group_by(Time, fact_in, .drop=FALSE) %>%
summarise(n = length(fact_in), .groups='drop_last')
ggplot(by_month, aes(x=Time, y=n, group=fact_in, fill=fact_in))+
geom_col(alpha=0.6 , size=0.5, colour="black", position='dodge')+
theme(panel.border=element_rect(colour="black", fill= NA)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
labs(y = 'Count') +
scale_fill_discrete(name = "Intake Type")
Extract day of week
# add the variable
foo$weekday <- weekdays(foo$intake_date)
foo$weekday_ord <- factor(foo$weekday, # Change ordering manually
levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))
# function for plotting
day_volume <- function(foo){
plot_data <- foo %>% count(weekday_ord)
ggplot(plot_data) + geom_col(aes(x = weekday_ord, y=n)) +
labs(x = 'Day', y = 'Count')+
geom_text(aes(x = weekday_ord, label = sprintf("%.f", n), y= n), vjust=2, colour="white", size=4)
}
day_volume(foo)
And now separately for dogs:
Use the simulation-based approach covered in class (the arm library, etc.) to find the computational 95% confidence interval of your coefficients and report them here. Set the number of simulations to 10,000.
day_volume(foo %>% filter(species == 'Dog'))
And cats:
day_volume(foo %>% filter(species == 'Cat'))
Answer verbally
summary(is.na(foo))
## ï..animal_id species dob intake_date
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:8668 FALSE:8668 FALSE:8634 FALSE:8668
## TRUE :34
## src_intake_type src_intake_subtype src_intake_reason src_primary_breed
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:8668 FALSE:7970 FALSE:5157 FALSE:8668
## TRUE :698 TRUE :3511
## src_finders_zip_code src_found_zip_code weekday weekday_ord
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:8668 FALSE:8388 FALSE:8668 FALSE:8668
## TRUE :280
# subtype 698, reason 3511, dob 34, found zip 280
table(foo$src_finders_zip_code) # 160 with
##
## 0 85035 85145 85321 85337 85601 85602 85614 85619 85622 85629 85631 85634
## 160 1 1 105 4 1 4 8 1 10 45 12 6
## 85641 85645 85653 85658 85701 85704 85705 85706 85708 85709 85710 85711 85712
## 85 3 124 6 42 125 507 337 10 1 239 254 167
## 85713 85714 85715 85716 85718 85719 85730 85735 85736 85737 85739 85741 85742
## 366 109 24 146 64 189 167 111 84 27 35 189 85
## 85743 85745 85746 85747 85748 85749 85750 85754 85755 85756 85757 97403
## 153 3743 347 43 45 21 34 1 23 216 187 1
# load geometry
geometry <- readRDS('zips.rds')
# just counts
finder_count <- foo %>% filter(src_finders_zip_code!=0) %>% group_by(zip=src_finders_zip_code) %>%
summarise(count = length(src_finders_zip_code), .groups='keep')
found_count <- foo %>% group_by(zip=src_found_zip_code) %>%
summarise(count = length(src_found_zip_code), .groups='keep')
# Merge the counts and name properly
countDF <- inner_join(finder_count,found_count, by='zip')
colnames(countDF) = c('zip', 'countFinder', 'countFound')
countDF$zip = as.character(countDF$zip)
count_sf <- geometry %>% inner_join(countDF, by = "zip")
fix_sf <- function(old_sf){
new_sf <- old_sf %>% st_transform(4326)
names(st_geometry(new_sf)) = NULL
#return (old_sf) # raises warnings that suggests to do the below lines, but still works
return(st_transform(old_sf, '+proj=longlat +datum=WGS84')) # this works well locally but fails there
#return (new_sf) # this works without warning locally, but not on shinyapps
}
pal <- colorBin(palette='Purples', domain = count_sf$countFound, bins = c(0, 250, 500, 750, 1000))
label <- sprintf("<strong>%s</strong><br/>%g %s", count_sf$zip, count_sf$countFound, 'Found Animals') %>%
lapply(htmltools::HTML)
#layer_count <- new_layer(count_sf$countFound, count_sf$zip, 'Strays', c(0, 100, 200, 500, 1000))
leaflet() %>%
addTiles() %>%
setView(lat = 30.2692, lng = -97.7437, zoom=11) %>%
addPolygons(data=fix_sf(count_sf), group='Found', fillColor=~pal(countFound),
fillOpacity = 0.7, color='grey', weight = 1, opacity = 0.4, label = label,
highlightOptions = highlightOptions(color = "black",weight = 2, bringToFront = TRUE))